sink("isolationindexmeasures-psrm-log.txt",append=F,type="output")

library(plyr)
library(tidyr)
library(xtable)

########
###PARTISAN SEGREGATION - OVERALL/BY RESPONDENT TYPE/BY CONTENT TYPE
########

#Function for unadjusted partisan isolation index measure
partisan.segregation.unadjusted <- function(dataframe,variable,category){
	require('tidyr')
	require('plyr')
			
	if(category!='all'){
		url.survey.inner <- dataframe[which(dataframe[,variable]==category),]
	}
	else{
		url.survey.inner <- dataframe
	}
	
	out <- url.survey.inner	

	overall.out <- ddply(out,.(domain),summarise,rep.count=sum(weight[which(rep.partisanship==1)],na.rm=TRUE),dem.count=sum(weight[which(rep.partisanship==0)],na.rm=TRUE),neutral.count=sum(weight[which(is.na(rep.partisanship))],na.rm=TRUE))
	overall.out$rep.share.1 <- overall.out$rep.count/(overall.out$rep.count+overall.out$dem.count)
	overall.out$rep.count.2 <- overall.out$neutral.count*overall.out$rep.share.1
	overall.out$dem.count.2 <- overall.out$neutral.count*(1-overall.out$rep.share.1)
	overall.out$rep.total.count <- overall.out$rep.count + overall.out$rep.count.2
	overall.out$dem.total.count <- overall.out$dem.count + overall.out$dem.count.2

	overall.out$total.visits <- overall.out$rep.total.count + overall.out$dem.total.count
	total.rep.visits <- sum(overall.out$rep.total.count,na.rm=TRUE)
	total.dem.visits <- sum(overall.out$dem.total.count,na.rm=TRUE)
	overall.out$rep.visit.share <- overall.out$rep.total.count/total.rep.visits
	overall.out$dem.visit.share <- overall.out$dem.total.count/total.dem.visits
	overall.out$rep.domain.share <- overall.out$rep.total.count/overall.out$total.visits

	visit.diff <- sum(overall.out$rep.visit.share*overall.out$rep.domain.share,na.rm=TRUE) - sum(overall.out$dem.visit.share*overall.out$rep.domain.share,na.rm=TRUE)
	return(visit.diff)
}

#Function for adjusted partisan isolation index measure
partisan.segregation.adjusted <- function(dataframe,variable,category){
	require('tidyr')
	require('plyr')
		
	if(category!='all'){
		url.survey.inner <- dataframe[which(dataframe[,variable]==category),]
	}
	else{
		url.survey.inner <- dataframe
	}
	
	full.data <- url.survey.inner	

	person.domain.frame <- ddply(full.data,.(caseid,rep.partisanship,domain),summarise,person.level.weight=sum(weight,na.rm=TRUE))
	
	#Domain-Level Counts of Visit Patterns
	overall.out <- ddply(full.data,.(domain),summarise,rep.count=sum(weight[which(rep.partisanship==1)],na.rm=TRUE),dem.count=sum(weight[which(rep.partisanship==0)],na.rm=TRUE),neutral.count=sum(weight[which(is.na(rep.partisanship))],na.rm=TRUE))
	overall.out$rep.count.2 <- overall.out$rep.count/((overall.out$rep.count+overall.out$dem.count)/(overall.out$rep.count+overall.out$dem.count+overall.out$neutral.count))
	overall.out$dem.count.2 <- overall.out$dem.count/((overall.out$rep.count+overall.out$dem.count)/(overall.out$rep.count+overall.out$dem.count+overall.out$neutral.count))
	
	#Merge Domain-Level Information to Person-Domain Frame
	out <- merge(person.domain.frame,overall.out,by=c('domain'),all.x=TRUE)
	out <- subset(out,!is.na(out$rep.partisanship))
	
	out$total.count.1 <- out$rep.count + out$dem.count
	out$total.count.2 <- out$rep.count.2 + out$dem.count.2
	
	#Account for respondents without partisan affiliation
	out$respondent.weight <- out$person.level.weight/((out$rep.count + out$dem.count)/(out$rep.count + out$dem.count + out$neutral.count))
	
	weight.term <- ddply(out,.(domain,rep.partisanship),summarise,domain.party.weights=sum(respondent.weight,na.rm=TRUE))
	out <- merge(out,weight.term,by=c('domain','rep.partisanship'),all.x=TRUE)
	
	#Determine the respondent's relative contribution to the web domain's partisan audience share
	out$relative.weight <- NA
	out$relative.weight[which(out$rep.partisanship==1)] <- out$respondent.weight[which(out$rep.partisanship==1)]/out$domain.party.weights[which(out$rep.partisanship==1)]
	out$relative.weight[which(out$rep.partisanship==0)] <- out$respondent.weight[which(out$rep.partisanship==0)]/out$domain.party.weights[which(out$rep.partisanship==0)]
	
	#Compute final statistic combining these separately for Reps and Dems
	out$final <- NA
	out$final[which(out$rep.partisanship==1)] <- out$relative.weight[which(out$rep.partisanship==1)]*((out$rep.count.2[which(out$rep.partisanship==1)] - out$respondent.weight[which(out$rep.partisanship==1)])/(out$total.count.2[which(out$rep.partisanship==1)]-out$respondent.weight[which(out$rep.partisanship==1)]))
	out$final[which(out$rep.partisanship==0)] <- out$relative.weight[which(out$rep.partisanship==0)]*((out$rep.count.2[which(out$rep.partisanship==0)])/(out$total.count.2[which(out$rep.partisanship==0)]-out$respondent.weight[which(out$rep.partisanship==0)]))
	
	#Domain-Party Measure of Partisan Web Audience
	by.count.biascorrect <- ddply(out,.(domain,rep.partisanship),summarise,final.weight=sum(final,na.rm=TRUE))
	final.weights <- spread(by.count.biascorrect,key=rep.partisanship,value=final.weight)
	names(final.weights) <- c('domain','dem.share','rep.share')
	
	#Combine with Domain Level Mesures
	overall.out <- merge(overall.out,final.weights,by=c('domain'),all.x=TRUE)
	overall.out$rep.audience.share <- overall.out$rep.count.2/sum(overall.out$rep.count.2,na.rm=TRUE)
	overall.out$dem.audience.share <- overall.out$dem.count.2/sum(overall.out$dem.count.2,na.rm=TRUE)
	
	#Final Statistic Comparing Partisan Visit Patterns
	visit.diff <- sum(overall.out$rep.share*overall.out$rep.audience.share,na.rm=TRUE) - sum(overall.out$dem.share*overall.out$dem.audience.share,na.rm=TRUE)

	return(visit.diff)
}

###
###All Web Visits
###

load('all.visits.wparty.RData')
all.visits.wparty$weight <- all.visits.wparty$weight_pulse

all.visits.party.unadjusted <- partisan.segregation.unadjusted(all.visits.wparty,NA,'all')
all.visits.party.adjusted <- partisan.segregation.adjusted(all.visits.wparty,NA,'all')

#Among Subsets of Domain-Level Visits
political.domain.broad <- subset(all.visits.wparty,all.visits.wparty$political.domain==1)
nonpolitical.domain.broad <- subset(all.visits.wparty,all.visits.wparty$political.domain==0)

political.domain.nonaggregator <- subset(all.visits.wparty,all.visits.wparty$political.nonaggregator==1)
nonpolitical.domain.nonaggregator <- subset(all.visits.wparty,all.visits.wparty$political.nonaggregator==0 & all.visits.wparty$aggregator==0)
aggregator <- subset(all.visits.wparty,all.visits.wparty$aggregator==1)

political.broad.party.unadjusted <- partisan.segregation.unadjusted(political.domain.broad,NA,'all')
political.broad.party.adjusted <- partisan.segregation.adjusted(political.domain.broad,NA,'all')

nonpolitical.broad.party.unadjusted <- partisan.segregation.unadjusted(nonpolitical.domain.broad,NA,'all')
nonpolitical.broad.party.adjusted <- partisan.segregation.adjusted(nonpolitical.domain.broad,NA,'all')

political.nonaggregator.party.unadjusted <- partisan.segregation.unadjusted(political.domain.nonaggregator,NA,'all')
political.nonaggregator.party.adjusted <- partisan.segregation.adjusted(political.domain.nonaggregator,NA,'all')

nonpolitical.nonaggregator.party.unadjusted <- partisan.segregation.unadjusted(nonpolitical.domain.nonaggregator,NA,'all')
nonpolitical.nonaggregator.party.adjusted <- partisan.segregation.adjusted(nonpolitical.domain.nonaggregator,NA,'all')

aggregator.party.unadjusted <- partisan.segregation.unadjusted(aggregator,NA,'all')
aggregator.party.adjusted <- partisan.segregation.adjusted(aggregator,NA,'all')

#rm(aggregator,nonpolitical.domain.nonaggregator,political.domain.nonaggregator,nonpolitical.domain.broad,political.domain.broad)

###
###Political News Domain Visits
###
load('url.survey.wlabels.RData')

full.frame <- unique(url.survey.wlabels[,c('caseid','date','domain','rep.partisanship','high.interest','high.interest.visit','weight_pulse')])
names(full.frame) <- c('caseid','date','domain','rep.partisanship','high.interest','high.interest.visit','weight')
#alternative.weights <- merge(full.frame,updated.weight.frame,by=c('caseid'),all.x=TRUE)
alternative.weights <- unique(url.survey.wlabels[,c('caseid','date','domain','rep.partisanship','high.interest','high.interest.visit','weights.winterest')])
alternative.weights$weight <- alternative.weights$weights.winterest

same.ten.views <- c('yahoo.com/news','aol.com/article','bbc.co.uk','bbc.com','cnn.com','msnbc.com','foxnews.com','nytimes.com','huffingtonpost.com','drudgereport.com','usatoday.com')
top.ten.views <- c('yahoo.com/news','msn.com/en-us/news','huffingtonpost.com','washingtonpost.com','nytimes.com','cnn.com','foxnews.com','dailykos.com','fivethirtyeight.com','drudgereport.com')

full.frame.top.ten <- subset(full.frame,full.frame$domain %in% top.ten.views)
full.frame.same.ten <- subset(full.frame,full.frame$domain %in% same.ten.views)
low.interest <- subset(full.frame,full.frame$high.interest==0)
low.interest.visit <- subset(full.frame,full.frame$high.interest.visit==0)
high.interest <- subset(full.frame,full.frame$high.interest==1)
high.interest.visit <- subset(full.frame,full.frame$high.interest.visit==1)

politicaldomains.party.unadjusted.all <- partisan.segregation.unadjusted(full.frame,NA,'all')
politicaldomains.party.adjusted.all <- partisan.segregation.adjusted(full.frame,NA,'all')

politicaldomains.party.unadjusted.topten <- partisan.segregation.unadjusted(full.frame.top.ten,NA,'all')
politicaldomains.party.adjusted.topten <- partisan.segregation.adjusted(full.frame.top.ten,NA,'all')

politicaldomains.party.unadjusted.sameten <- partisan.segregation.unadjusted(full.frame.same.ten,NA,'all')
politicaldomains.party.adjusted.sameten <- partisan.segregation.adjusted(full.frame.same.ten,NA,'all')

politicaldomains.party.unadjusted.low <- partisan.segregation.unadjusted(low.interest,NA,'all')
politicaldomains.party.adjusted.low <- partisan.segregation.adjusted(low.interest,NA,'all')

politicaldomains.party.unadjusted.high <- partisan.segregation.unadjusted(high.interest,NA,'all')
politicaldomains.party.adjusted.high <- partisan.segregation.adjusted(high.interest,NA,'all')

politicaldomains.party.unadjusted.low.visit <- partisan.segregation.unadjusted(low.interest.visit,NA,'all')
politicaldomains.party.adjusted.low.visit <- partisan.segregation.adjusted(low.interest.visit,NA,'all')

politicaldomains.party.unadjusted.high.visit <- partisan.segregation.unadjusted(high.interest.visit,NA,'all')
politicaldomains.party.adjusted.high.visit <- partisan.segregation.adjusted(high.interest.visit,NA,'all')

#Table A2
names.a2 <- c('2016 (Low Interest-Survey Based)','2016 (Low Interest-Traffic Based)','2016 (High Interest-Survey Based)','2016 (High Interest-Traffic Based)')
values.a2 <- round(x=c(politicaldomains.party.adjusted.low, politicaldomains.party.adjusted.low.visit,politicaldomains.party.adjusted.high, politicaldomains.party.adjusted.high.visit),digits=2)
a2.out <- cbind.data.frame(names.a2,values.a2)
names(a2.out) <- c('Trait','Adjusted Isolation Index')
xtable(a2.out,caption='Partisan Isolation Index By Political Interest')

#Using Alternative Raking Weights that Incorporate Political Interest
politicaldomains.party.unadjusted.all.alternativeweights <- partisan.segregation.unadjusted(alternative.weights,NA,'all')
politicaldomains.party.adjusted.all.alternativeweights <- partisan.segregation.adjusted(alternative.weights,NA,'all')

rm(full.frame.top.ten,full.frame.same.ten,low.interest,low.interest.visit,high.interest, high.interest.visit)

###
###By News Topic
### 

url.survey.wlabels.topic <- unique(url.survey.wlabels[,c('caseid','date','domain','rep.partisanship','high.level','weight_pulse')])
url.survey.wlabels.topic <- subset(url.survey.wlabels.topic,!is.na(url.survey.wlabels.topic$high.level) & !is.na(url.survey.wlabels.topic$weight_pulse))
names(url.survey.wlabels.topic) <- c('caseid','date','domain','rep.partisanship','high.level','weight')

url.survey.wlabels.overall <- unique(url.survey.wlabels[,c('caseid','date','domain','rep.partisanship','weight_pulse')])
names(url.survey.wlabels.overall) <- c('caseid','date','domain','rep.partisanship','weight')
url.survey.wlabels.overall <- subset(url.survey.wlabels.overall,url.survey.wlabels.overall$caseid %in% unique(url.survey.wlabels.topic$caseid))
url.survey.wlabels.overall <- subset(url.survey.wlabels.overall,!is.na(url.survey.wlabels.overall$weight))

url.survey.wlabels.election <- subset(url.survey.wlabels,!is.na(url.survey.wlabels$high.level) & !is.na(url.survey.wlabels$weight_pulse))
url.survey.wlabels.election <- unique(url.survey.wlabels.election[,c('caseid','date','domain','rep.partisanship','weight_pulse')])
names(url.survey.wlabels.election) <- c('caseid','date','domain','rep.partisanship','weight')

labeledarticles.party.unadjusted <- partisan.segregation.unadjusted(url.survey.wlabels.overall,NA,'all')
labeledarticles.party.adjusted <- partisan.segregation.adjusted(url.survey.wlabels.overall,NA,'all')

electionarticles.party.unadjusted <- partisan.segregation.unadjusted(url.survey.wlabels.election,NA,'all')
electionarticles.party.adjusted <- partisan.segregation.adjusted(url.survey.wlabels.election,NA,'all')

clintonscandal.party.unadjusted <- partisan.segregation.unadjusted(url.survey.wlabels.topic,'high.level','Clinton Scandal')
clintonscandal.party.adjusted <- partisan.segregation.adjusted(url.survey.wlabels.topic,'high.level','Clinton Scandal')

trumpscandal.party.unadjusted <- partisan.segregation.unadjusted(url.survey.wlabels.topic,'high.level','Trump Scandal')
trumpscandal.party.adjusted <- partisan.segregation.adjusted(url.survey.wlabels.topic,'high.level','Trump Scandal')

issue.party.unadjusted <- partisan.segregation.unadjusted(url.survey.wlabels.topic,'high.level','Issue')
issue.party.adjusted <- partisan.segregation.adjusted(url.survey.wlabels.topic,'high.level','Issue')

strategy.party.unadjusted <- partisan.segregation.unadjusted(url.survey.wlabels.topic,'high.level','Strategy')
strategy.party.adjusted <- partisan.segregation.adjusted(url.survey.wlabels.topic,'high.level','Strategy')

event.party.unadjusted <- partisan.segregation.unadjusted(url.survey.wlabels.topic,'high.level','Event')
event.party.adjusted <- partisan.segregation.adjusted(url.survey.wlabels.topic,'high.level','Event')

###
###By Topic Slant
###

url.survey.wlabels.slant <- unique(url.survey.wlabels[,c('caseid','date','domain','rep.partisanship','slant.rating','weight_pulse')])
url.survey.wlabels.slant <- url.survey.wlabels.slant[which(!is.na(url.survey.wlabels.slant$slant.rating)),]
names(url.survey.wlabels.slant) <- c('caseid','date','domain','rep.partisanship','slant.rating','weight')

neutral.party.unadjusted <- partisan.segregation.unadjusted(url.survey.wlabels.slant,'slant.rating','Neutral')
neutral.party.adjusted <- partisan.segregation.adjusted(url.survey.wlabels.slant,'slant.rating','Neutral')

moderate.party.unadjusted <- partisan.segregation.unadjusted(url.survey.wlabels.slant,'slant.rating','Moderate')
moderate.party.adjusted <- partisan.segregation.adjusted(url.survey.wlabels.slant,'slant.rating','Moderate')

high.party.unadjusted <- partisan.segregation.unadjusted(url.survey.wlabels.slant,'slant.rating','High')
high.party.adjusted <- partisan.segregation.adjusted(url.survey.wlabels.slant,'slant.rating','High')

unadjusted.party.estimates <- c(all.visits.party.unadjusted,nonpolitical.broad.party.unadjusted,political.broad.party.unadjusted,political.nonaggregator.party.unadjusted,politicaldomains.party.unadjusted.all,politicaldomains.party.unadjusted.topten,politicaldomains.party.unadjusted.sameten,electionarticles.party.unadjusted,strategy.party.unadjusted,trumpscandal.party.unadjusted,clintonscandal.party.unadjusted,issue.party.unadjusted,neutral.party.unadjusted,moderate.party.unadjusted,high.party.unadjusted)

adjusted.party.estimates <- c(all.visits.party.adjusted,nonpolitical.broad.party.adjusted,political.broad.party.adjusted,political.nonaggregator.party.adjusted,politicaldomains.party.adjusted.all,politicaldomains.party.adjusted.topten,politicaldomains.party.adjusted.sameten,electionarticles.party.adjusted,strategy.party.adjusted,trumpscandal.party.adjusted,clintonscandal.party.adjusted,issue.party.adjusted,neutral.party.adjusted,moderate.party.adjusted,high.party.adjusted)

partisan.isolation.frame <- cbind.data.frame(unadjusted.party.estimates,adjusted.party.estimates)
partisan.isolation.frame$names <- c('All Web Traffic','All Non-Political Domains','Political Domains - Include All AOL/MSN/Yahoo Traffic','Political Domains - Exclude All AOL/MSN/Yahoo Traffic','Political Domains - Baseline','Political Domains-Top Ten','Political Domains-Same Ten','All Election Articles','Election Articles-Strategy','Election Articles-Trump Scandal','Election Articles-Clinton Scandal','Election Articles-Issue','Election Articles-Neutral Slant','Election Articles-Moderate Slant','Election Articles-High Slant')
partisan.isolation.frame <- partisan.isolation.frame[,c('names','unadjusted.party.estimates','adjusted.party.estimates')]

#Table D1 Here
xtable(partisan.isolation.frame)

####
#By News Outlet Level
####
url.survey.national <- subset(url.survey.wlabels,url.survey.wlabels$local==0)
url.survey.national <- unique(url.survey.national[,c('caseid','date','domain','rep.partisanship','local','weight_pulse')])
names(url.survey.national) <- c('caseid','date','domain','rep.partisanship','local','weight')

url.survey.local <- subset(url.survey.wlabels,url.survey.wlabels$local==1)
url.survey.local <- unique(url.survey.local[,c('caseid','date','domain','rep.partisanship','local','weight_pulse')])
names(url.survey.local) <- c('caseid','date','domain','rep.partisanship','local','weight')

national.party.adjusted <- partisan.segregation.adjusted(url.survey.national,NA,'all')
local.party.adjusted <- partisan.segregation.adjusted(url.survey.local,NA,'all')
national.party.adjusted;local.party.adjusted

########
###Segregation by Other Variables
########
trait.isolation.unadjusted <- function(dataframe,trait.variable){
	
	#Make ddply call - domain level
	functiontext.domain <- paste('ddply(',dataframe,',.(domain),summarise,in.count=sum(weight[which(',trait.variable,'==1)],na.rm=TRUE),out.count=sum(weight[which(',trait.variable,'==0)],na.rm=TRUE),neutral.count=sum(weight[which(is.na(',trait.variable,'))],na.rm=TRUE))',sep='')
	overall.out <- eval(parse(text=functiontext.domain))
	
	#Determine In Group and Out Group Shares of Domain level visits and Share of Group visits to these domains - Treat missing values as in G&S 2010
	overall.out$in.share.1 <- overall.out$in.count/(overall.out$in.count+overall.out$out.count)
	overall.out$in.count.2 <- overall.out$neutral.count*overall.out$in.share.1
	overall.out$out.count.2 <- overall.out$neutral.count*(1-overall.out$in.share.1)
	overall.out$in.total.count <- overall.out$in.count + overall.out$in.count.2
	overall.out$out.total.count <- overall.out$out.count + overall.out$out.count.2

	overall.out$total.visits <- overall.out$in.total.count + overall.out$out.total.count
	total.in.visits <- sum(overall.out$in.total.count,na.rm=TRUE)
	total.out.visits <- sum(overall.out$out.total.count,na.rm=TRUE)
	overall.out$in.visit.share <- overall.out$in.total.count/total.in.visits
	overall.out$out.visit.share <- overall.out$out.total.count/total.out.visits
	overall.out$in.domain.share <- overall.out$in.total.count/overall.out$total.visits
	
	#Estimate and Return unadjusted isolation statistic
	visit.diff <- sum(overall.out$in.visit.share*overall.out$in.domain.share,na.rm=TRUE) - sum(overall.out$out.visit.share*overall.out$in.domain.share,na.rm=TRUE)
	return(visit.diff)
}

trait.isolation.adjusted <- function(dataframe,trait.variable){
	
	#Make ddply call - person/domain level summary
	functiontext.persondomain <- paste('ddply(',dataframe,',.(caseid,domain,',trait.variable,'),summarise,person.level.weight=sum(weight,na.rm=TRUE))')
	person.domain.frame <- eval(parse(text=functiontext.persondomain))
	names(person.domain.frame) <- c('caseid','domain','trait.variable','person.level.weight')

	#Make ddply call - domain level summary
	functiontext <- paste('ddply(',dataframe,',.(domain),summarise,in.count=sum(weight[which(',trait.variable,'==1)],na.rm=TRUE),out.count=sum(weight[which(',trait.variable,'==0)],na.rm=TRUE),neutral.count=sum(weight[which(is.na(',trait.variable,'))],na.rm=TRUE))',sep='')
	overall.out <- eval(parse(text=functiontext))
	
	overall.out$in.count.2 <- overall.out$in.count/((overall.out$in.count+overall.out$out.count)/(overall.out$in.count+overall.out$out.count+overall.out$neutral.count))
	overall.out$out.count.2 <- overall.out$out.count/((overall.out$in.count+overall.out$out.count)/(overall.out$in.count+overall.out$out.count+overall.out$neutral.count))
	
	#Merge Domain-Level Information to Person-Domain Frame
	out <- merge(person.domain.frame,overall.out,by=c('domain'),all.x=TRUE)
	out <- subset(out,!is.na(out$trait.variable))
	
	out$total.count.1 <- out$in.count + out$out.count
	out$total.count.2 <- out$in.count.2 + out$out.count.2
	
	#Upweight to account for respondents without partisan affiliation
	out$respondent.weight <- out$person.level.weight/((out$in.count + out$out.count)/(out$in.count + out$out.count + out$neutral.count))
	weight.term <- 	weight.term <- ddply(out,.(domain,trait.variable),summarise,domain.trait.weights=sum(respondent.weight,na.rm=TRUE))
	out <- merge(out,weight.term,by=c('domain','trait.variable'),all.x=TRUE)
	
	#Determine the respondent's relative contribution to the web domain's partisan audience share
	out$relative.weight <- NA
	out$relative.weight[which(out$trait.variable==1)] <- out$respondent.weight[which(out$trait.variable==1)]/out$domain.trait.weights[which(out$trait.variable==1)]
	out$relative.weight[which(out$trait.variable==0)] <- out$respondent.weight[which(out$trait.variable==0)]/out$domain.trait.weights[which(out$trait.variable==0)]
	
	#Compute final statistic combining these separately for Reps and Dems
	out$final <- NA
	out$final[which(out$trait.variable==1)] <- out$relative.weight[which(out$trait.variable==1)]*((out$in.count.2[which(out$trait.variable==1)] - out$respondent.weight[which(out$trait.variable==1)])/(out$total.count.2[which(out$trait.variable==1)]-out$respondent.weight[which(out$trait.variable==1)]))
	out$final[which(out$trait.variable==0)] <- out$relative.weight[which(out$trait.variable==0)]*((out$in.count.2[which(out$trait.variable==0)])/(out$total.count.2[which(out$trait.variable==0)]-out$respondent.weight[which(out$trait.variable==0)]))
	
	#Domain-Party Measure of Partisan Web Audience
	by.count.biascorrect <- ddply(out,.(domain,trait.variable),summarise,final.weight=sum(final,na.rm=TRUE))
	final.weights <- spread(by.count.biascorrect,key=trait.variable,value=final.weight)
	names(final.weights) <- c('domain','out.share','in.share')
	
	#Combine with Domain Level Measures
	overall.out <- merge(overall.out,final.weights,by=c('domain'),all.x=TRUE)
	overall.out$out.audience.share <- overall.out$out.count.2/sum(overall.out$out.count.2,na.rm=TRUE)
	overall.out$in.audience.share <- overall.out$in.count.2/sum(overall.out$in.count.2,na.rm=TRUE)

	#Final Statistic Comparing Partisan Visit Patterns
	visit.diff <- sum(overall.out$in.share*overall.out$in.audience.share,na.rm=TRUE) - sum(overall.out$out.share*overall.out$out.audience.share,na.rm=TRUE)
	return(visit.diff)
}

race.frame <- unique(url.survey.wlabels[,c('caseid','date','domain','white','weight_pulse')])
names(race.frame) <- c('caseid','date','domain','white','weight')

gender.frame <- unique(url.survey.wlabels[,c('caseid','date','domain','female','weight_pulse')])
names(gender.frame) <- c('caseid','date','domain','female','weight')

education.frame <- unique(url.survey.wlabels[,c('caseid','date','domain','college.plus','weight_pulse')])
names(education.frame) <- c('caseid','date','domain','college.plus','weight')

ideology.frame <- unique(url.survey.wlabels[,c('caseid','date','domain','conservative','weight_pulse')])
names(ideology.frame) <- c('caseid','date','domain','conservative','weight')

ideology.frame.top.ten <- subset(ideology.frame,ideology.frame$domain %in% top.ten.views)
ideology.frame.same.ten <- subset(ideology.frame,ideology.frame$domain %in% same.ten.views)

party.frame <- unique(url.survey.wlabels[,c('caseid','date','domain','rep.partisanship','weight_pulse')])
names(party.frame) <- c('caseid','date','domain','rep.partisanship','weight')

#Unadjusted Estimates
race.isolation.unadjusted <- trait.isolation.unadjusted('race.frame','white')
gender.isolation.unadjusted <- trait.isolation.unadjusted('gender.frame','female')
education.isolation.unadjusted <- trait.isolation.unadjusted('education.frame','college.plus')
ideology.isolation.unadjusted <- trait.isolation.unadjusted('ideology.frame','conservative')
party.isolation.unadjusted <- trait.isolation.unadjusted('party.frame','rep.partisanship')

#Adjusted Estimates
race.isolation.adjusted <- trait.isolation.adjusted('race.frame','white')
gender.isolation.adjusted <- trait.isolation.adjusted('gender.frame','female')
education.isolation.adjusted <- trait.isolation.adjusted('education.frame','college.plus')
ideology.isolation.adjusted <- trait.isolation.adjusted('ideology.frame','conservative')
party.isolation.adjusted <- trait.isolation.adjusted('party.frame','rep.partisanship')

other.traits.adjusted <- c(ideology.isolation.adjusted,education.isolation.adjusted,gender.isolation.adjusted,race.isolation.adjusted)
other.traits.unadjusted <- c(ideology.isolation.unadjusted,education.isolation.unadjusted,gender.isolation.unadjusted,race.isolation.unadjusted)

trait.frame <- cbind.data.frame(other.traits.unadjusted,other.traits.adjusted)
trait.frame$name <- c('Ideology','Education','Gender','Race')
trait.frame <- trait.frame[,c('name','other.traits.unadjusted','other.traits.adjusted')]

#Table D3
xtable(trait.frame)

#Additional Ideology Analysis
ideology.isolation.adjusted.same.ten <- trait.isolation.adjusted('ideology.frame.top.ten','conservative')
ideology.isolation.adjusted.top.ten <- trait.isolation.adjusted('ideology.frame.same.ten','conservative')

#Table 2
party.isolation.output <- round(partisan.isolation.frame[c(5,7,6),3],digits=2)
ideology.isolation.output <- round(c(ideology.isolation.adjusted,ideology.isolation.adjusted.same.ten,ideology.isolation.adjusted.top.ten),digits=2)
party.isolation.output.gs <- c(0.07,'-','-') #From Gentzkow and Shapiro 2011, Table VIII)
ideology.isolation.output.gs <- c(0.08,'-','-') #From Gentzkow and Shapiro 2011, Table VIII)
domain.list <- c('All News Domains','Top 10 (2009 list)','Top 10 (2016 list)','All News Domains','Top 10 (2009 list)','Top 10 (2016 list)')
variable.list <- c('Party','Party','Party','Ideology','Ideology','Ideology')

table2.output <- cbind.data.frame(domain.list,variable.list,c(party.isolation.output.gs,ideology.isolation.output.gs),c(party.isolation.output,ideology.isolation.output))
names(table2.output) <- c('Domains','Variable','2009 Isolation Index','2016 Isolation Index')
xtable(table2.output)

########
###Partisan Segregation by Geography
########
load('geo.survey.RData')

geographic.partisan.isolation.unadjusted <- function(dataframe,geo.unit){
	
	dataframe.text <- deparse(substitute(dataframe))

	functiontext <- paste("ddply(",dataframe.text,",.(",geo.unit,"),summarise,rep.count=sum(weight_full[which(rep.partisanship==1)],na.rm=TRUE),dem.count=sum(weight_full[which(rep.partisanship==0)],na.rm=TRUE),neutral.count=sum(weight_full[which(is.na(rep.partisanship))],na.rm=TRUE))",sep='')
	overall.out <- eval(parse(text=functiontext))
	
	overall.out$rep.share <- overall.out$rep.count/(overall.out$rep.count+overall.out$dem.count)
	overall.out$rep.count.2 <- overall.out$rep.count/((overall.out$rep.count+overall.out$dem.count)/(overall.out$rep.count+overall.out$dem.count+overall.out$neutral.count))
	overall.out$dem.count.2 <- overall.out$dem.count/((overall.out$rep.count+overall.out$dem.count)/(overall.out$rep.count+overall.out$dem.count+overall.out$neutral.count))
	overall.out$rep.audience.share <- overall.out$rep.count.2/sum(overall.out$rep.count.2,na.rm=TRUE)
	overall.out$dem.audience.share <- overall.out$dem.count.2/sum(overall.out$dem.count.2,na.rm=TRUE)
	visit.diff <- sum(overall.out$rep.share*overall.out$rep.audience.share,na.rm=TRUE) - sum(overall.out$dem.share*overall.out$rep.audience.share,na.rm=TRUE)
	return(visit.diff)
}

geographic.partisan.isolation.adjusted <- function(dataframe,geo.unit){
	
	dataframe.text <- deparse(substitute(dataframe))
	names(dataframe)[which(names(dataframe)==geo.unit)] <- 'geo.unit'
	
	functiontext <- paste("ddply(",dataframe.text,",.(",geo.unit,"),summarise,rep.count=sum(weight_full[which(rep.partisanship==1)],na.rm=TRUE),dem.count=sum(weight_full[which(rep.partisanship==0)],na.rm=TRUE),neutral.count=sum(weight_full[which(is.na(rep.partisanship))],na.rm=TRUE))",sep='')
	overall.out <- eval(parse(text=functiontext))
	names(overall.out) <- c('geo.unit','rep.count','dem.count','neutral.count')

	geo.survey.inprogress <- merge(dataframe,overall.out,by=c('geo.unit'),all.x=TRUE)
	geo.survey.inprogress <- subset(geo.survey.inprogress,!is.na(geo.survey.inprogress$rep.partisanship))
	
	geo.survey.inprogress$rep.count.2 <- geo.survey.inprogress$rep.count/((geo.survey.inprogress$rep.count+geo.survey.inprogress$dem.count)/(geo.survey.inprogress$rep.count+geo.survey.inprogress$dem.count+geo.survey.inprogress$neutral.count))
	geo.survey.inprogress$dem.count.2 <- geo.survey.inprogress$dem.count/((geo.survey.inprogress$rep.count+geo.survey.inprogress$dem.count)/(geo.survey.inprogress$rep.count+geo.survey.inprogress$dem.count+geo.survey.inprogress$neutral.count))
	geo.survey.inprogress$total.count.1 <- geo.survey.inprogress$rep.count + geo.survey.inprogress$dem.count
	geo.survey.inprogress$total.count.2 <- geo.survey.inprogress$rep.count.2 + geo.survey.inprogress$dem.count.2
	
	geo.survey.inprogress$respondent.weight <- geo.survey.inprogress$weight_full/((geo.survey.inprogress$rep.count + geo.survey.inprogress$dem.count)/(geo.survey.inprogress$rep.count + geo.survey.inprogress$dem.count + geo.survey.inprogress$neutral.count))
	
	weight.term <- ddply(geo.survey.inprogress,.(geo.unit,rep.partisanship),summarise,geo.ideology.weights=sum(respondent.weight,na.rm=TRUE))
	geo.survey.inprogress <- merge(geo.survey.inprogress,weight.term,by=c('geo.unit','rep.partisanship'),all.x=TRUE)

	#Calculating w_ij terms seperately for conservative and liberals
	geo.survey.inprogress$relative.weight <- NA
	geo.survey.inprogress$relative.weight[which(geo.survey.inprogress$rep.partisanship==1)] <- geo.survey.inprogress$respondent.weight[which(geo.survey.inprogress$rep.partisanship==1)]/geo.survey.inprogress$geo.ideology.weights[which(geo.survey.inprogress$rep.partisanship==1)]
	geo.survey.inprogress$relative.weight[which(geo.survey.inprogress$rep.partisanship==0)] <- geo.survey.inprogress$respondent.weight[which(geo.survey.inprogress$rep.partisanship==0)]/geo.survey.inprogress$geo.ideology.weights[which(geo.survey.inprogress$rep.partisanship==0)]

	#Compute final statistic combining these seperately for conservative and liberals
	geo.survey.inprogress$final <- NA
	geo.survey.inprogress$final[which(geo.survey.inprogress$rep.partisanship==1)] <- geo.survey.inprogress$relative.weight[which(geo.survey.inprogress$rep.partisanship==1)]*((geo.survey.inprogress$rep.count.2[which(geo.survey.inprogress$rep.partisanship==1)] - geo.survey.inprogress$respondent.weight[which(geo.survey.inprogress$rep.partisanship==1)])/(geo.survey.inprogress$total.count.2[which(geo.survey.inprogress$rep.partisanship==1)]-geo.survey.inprogress$respondent.weight[which(geo.survey.inprogress$rep.partisanship==1)]))
	geo.survey.inprogress$final[which(geo.survey.inprogress$rep.partisanship==0)] <- geo.survey.inprogress$relative.weight[which(geo.survey.inprogress$rep.partisanship==0)]*((geo.survey.inprogress$rep.count.2[which(geo.survey.inprogress$rep.partisanship==0)])/(geo.survey.inprogress$total.count.2[which(geo.survey.inprogress$rep.partisanship==0)]-geo.survey.inprogress$respondent.weight[which(geo.survey.inprogress$rep.partisanship==0)]))
	
	by.count.biascorrect <- ddply(geo.survey.inprogress,.(geo.unit,rep.partisanship),summarise,final.weight=sum(final,na.rm=TRUE))
	final.weights <- spread(by.count.biascorrect,key=rep.partisanship,value=final.weight)
	names(final.weights) <- c('geo.unit','dem.share','rep.share')
	
	overall.out <- merge(overall.out,final.weights,by=c('geo.unit'),all.x=TRUE)
	overall.out$rep.count.2 <- overall.out$rep.count/((overall.out$rep.count+overall.out$dem.count)/(overall.out$rep.count+overall.out$dem.count+overall.out$neutral.count))
	overall.out$dem.count.2 <- overall.out$dem.count/((overall.out$rep.count+overall.out$dem.count)/(overall.out$rep.count+overall.out$dem.count+overall.out$neutral.count))
	overall.out$rep.audience.share <- overall.out$rep.count.2/sum(overall.out$rep.count.2,na.rm=TRUE)
	overall.out$dem.audience.share <- overall.out$dem.count.2/sum(overall.out$dem.count.2,na.rm=TRUE)
	visit.diff <- sum(overall.out$rep.share*overall.out$rep.audience.share,na.rm=TRUE) - sum(overall.out$dem.share*overall.out$dem.audience.share,na.rm=TRUE)
	return(visit.diff)
}

#Entire Sampling Frame for Initial Study
full.county.adjusted <- geographic.partisan.isolation.adjusted(geo.survey,"fips")
full.zip.adjusted <- geographic.partisan.isolation.adjusted(geo.survey,"zip.final")

full.county.unadjusted <- geographic.partisan.isolation.unadjusted(geo.survey,"fips")
full.zip.unadjusted <- geographic.partisan.isolation.unadjusted(geo.survey,"zip.final")

#Subset to only Wakoopa Panelists
wakoopa.subset <- subset(geo.survey,!is.na(geo.survey$weight_pulse))
wakoopa.subset$weight_full <- wakoopa.subset$weight_pulse

panel.county.adjusted <- geographic.partisan.isolation.adjusted(wakoopa.subset,"fips")
panel.zip.adjusted <- geographic.partisan.isolation.adjusted(wakoopa.subset,"zip.final")

panel.county.unadjusted <- geographic.partisan.isolation.unadjusted(wakoopa.subset,"fips")
panel.zip.unadjusted <- geographic.partisan.isolation.unadjusted(wakoopa.subset,"zip.final")

unadjusted.geo <- c(full.county.unadjusted,full.zip.unadjusted,panel.county.unadjusted,panel.zip.unadjusted)
adjusted.geo <- c(full.county.adjusted,full.zip.adjusted,panel.county.adjusted,panel.zip.adjusted)

geo.table <- cbind.data.frame(unadjusted.geo,adjusted.geo)
geo.table$name <- c('Full-County','Full-Zip Code','Panel-County','Panel-Zip Code')
geo.table <- geo.table[,c('name','unadjusted.geo','adjusted.geo')]

#Table D2
xtable(geo.table)

########
###Bootstrap SES on Partisan Segregation Measures
########
rm(list=ls())

#Function for adjusted partisan isolation index measure
partisan.segregation.adjusted <- function(dataframe,variable,category){
	require('tidyr')
	require('plyr')
		
	if(category!='all'){
		url.survey.inner <- dataframe[which(dataframe[,variable]==category),]
	}
	else{
		url.survey.inner <- dataframe
	}
	
	full.data <- url.survey.inner	

	person.domain.frame <- ddply(full.data,.(caseid,rep.partisanship,domain),summarise,person.level.weight=sum(weight,na.rm=TRUE))
	
	#Domain-Level Counts of Visit Patterns
	overall.out <- ddply(full.data,.(domain),summarise,rep.count=sum(weight[which(rep.partisanship==1)],na.rm=TRUE),dem.count=sum(weight[which(rep.partisanship==0)],na.rm=TRUE),neutral.count=sum(weight[which(is.na(rep.partisanship))],na.rm=TRUE))
	overall.out$rep.count.2 <- overall.out$rep.count/((overall.out$rep.count+overall.out$dem.count)/(overall.out$rep.count+overall.out$dem.count+overall.out$neutral.count))
	overall.out$dem.count.2 <- overall.out$dem.count/((overall.out$rep.count+overall.out$dem.count)/(overall.out$rep.count+overall.out$dem.count+overall.out$neutral.count))
	
	#Merge Domain-Level Information to Person-Domain Frame
	out <- merge(person.domain.frame,overall.out,by=c('domain'),all.x=TRUE)
	out <- subset(out,!is.na(out$rep.partisanship))
	
	out$total.count.1 <- out$rep.count + out$dem.count
	out$total.count.2 <- out$rep.count.2 + out$dem.count.2
	
	#Upweight to account for respondents without partisan affiliation
	out$respondent.weight <- out$person.level.weight/((out$rep.count + out$dem.count)/(out$rep.count + out$dem.count + out$neutral.count))
	
	weight.term <- ddply(out,.(domain,rep.partisanship),summarise,domain.party.weights=sum(respondent.weight,na.rm=TRUE))
	out <- merge(out,weight.term,by=c('domain','rep.partisanship'),all.x=TRUE)
	
	#Determine the respondent's relative contribution to the web domain's partisan audience share
	out$relative.weight <- NA
	out$relative.weight[which(out$rep.partisanship==1)] <- out$respondent.weight[which(out$rep.partisanship==1)]/out$domain.party.weights[which(out$rep.partisanship==1)]
	out$relative.weight[which(out$rep.partisanship==0)] <- out$respondent.weight[which(out$rep.partisanship==0)]/out$domain.party.weights[which(out$rep.partisanship==0)]
	
	#Compute final statistic combining these separately for Reps and Dems
	out$final <- NA
	out$final[which(out$rep.partisanship==1)] <- out$relative.weight[which(out$rep.partisanship==1)]*((out$rep.count.2[which(out$rep.partisanship==1)] - out$respondent.weight[which(out$rep.partisanship==1)])/(out$total.count.2[which(out$rep.partisanship==1)]-out$respondent.weight[which(out$rep.partisanship==1)]))
	out$final[which(out$rep.partisanship==0)] <- out$relative.weight[which(out$rep.partisanship==0)]*((out$rep.count.2[which(out$rep.partisanship==0)])/(out$total.count.2[which(out$rep.partisanship==0)]-out$respondent.weight[which(out$rep.partisanship==0)]))
	
	#Domain-Party Measure of Partisan Web Audience
	by.count.biascorrect <- ddply(out,.(domain,rep.partisanship),summarise,final.weight=sum(final,na.rm=TRUE))
	final.weights <- spread(by.count.biascorrect,key=rep.partisanship,value=final.weight)
	names(final.weights) <- c('domain','dem.share','rep.share')
	
	#Combine with Domain Level Mesures
	overall.out <- merge(overall.out,final.weights,by=c('domain'),all.x=TRUE)
	overall.out$rep.audience.share <- overall.out$rep.count.2/sum(overall.out$rep.count.2,na.rm=TRUE)
	overall.out$dem.audience.share <- overall.out$dem.count.2/sum(overall.out$dem.count.2,na.rm=TRUE)
	
	#Final Statistic Comparing Partisan Visit Patterns
	visit.diff <- sum(overall.out$rep.share*overall.out$rep.audience.share,na.rm=TRUE) - sum(overall.out$dem.share*overall.out$dem.audience.share,na.rm=TRUE)
	
	return(visit.diff)
}

load('url.survey.wlabels.RData')
url.survey.wlabels.topic <- unique(url.survey.wlabels[,c('caseid','date','domain','rep.partisanship','high.level','weight_pulse')])
url.survey.wlabels.topic <- subset(url.survey.wlabels.topic,!is.na(url.survey.wlabels.topic$high.level) & !is.na(url.survey.wlabels.topic$weight_pulse))
names(url.survey.wlabels.topic) <- c('caseid','date','domain','rep.partisanship','high.level','weight')

url.survey.wlabels.overall <- unique(url.survey.wlabels[,c('caseid','date','domain','rep.partisanship','weight_pulse')])
names(url.survey.wlabels.overall) <- c('caseid','date','domain','rep.partisanship','weight')
url.survey.wlabels.overall <- subset(url.survey.wlabels.overall,url.survey.wlabels.overall$caseid %in% unique(url.survey.wlabels.topic$caseid))
url.survey.wlabels.overall <- subset(url.survey.wlabels.overall,!is.na(url.survey.wlabels.overall$weight))

url.survey.wlabels.election <- subset(url.survey.wlabels,!is.na(url.survey.wlabels$high.level) & !is.na(url.survey.wlabels$weight_pulse))
url.survey.wlabels.election <- unique(url.survey.wlabels.election[,c('caseid','date','domain','rep.partisanship','weight_pulse')])
names(url.survey.wlabels.election) <- c('caseid','date','domain','rep.partisanship','weight')

all.isolation <- partisan.segregation.adjusted(url.survey.wlabels.overall,NA,'all')
allelection.isolation <- partisan.segregation.adjusted(url.survey.wlabels.election,NA,'all')
clinton.scandal.isolation <- partisan.segregation.adjusted(url.survey.wlabels.topic,'high.level','Clinton Scandal')
trump.scandal.isolation <- partisan.segregation.adjusted(url.survey.wlabels.topic,'high.level','Trump Scandal')
issue.isolation <- partisan.segregation.adjusted(url.survey.wlabels.topic,'high.level','Issue')
strategy.isolation <- partisan.segregation.adjusted(url.survey.wlabels.topic,'high.level','Strategy')
event.isolation <- partisan.segregation.adjusted(url.survey.wlabels.topic,'high.level','Event')

all.isolation.se <- NA
allelection.isolation.se <- NA
clinton.scandal.isolation.se <- NA
trump.scandal.isolation.se <- NA
issue.isolation.se <- NA
strategy.isolation.se <- NA
event.isolation.se <- NA

election.all.diff.se <- NA
clinton.all.diff.se <- NA
trump.all.diff.se <- NA
issue.all.diff.se <- NA
strategy.all.diff.se <- NA
event.all.diff.se <- NA

#ID list for cluster bootstrap for each data frame
id.list.overall <- list()
for(k in 1:length(unique(url.survey.wlabels.overall$caseid))){
	id.list.overall[[as.character(unique(url.survey.wlabels.overall$caseid)[k])]] <- which(url.survey.wlabels.overall$caseid==unique(url.survey.wlabels.overall$caseid)[k])
}

id.list.election <- list()
for(k in 1:length(unique(url.survey.wlabels.election$caseid))){
	id.list.election[[as.character(unique(url.survey.wlabels.election$caseid)[k])]] <- which(url.survey.wlabels.election$caseid==unique(url.survey.wlabels.election$caseid)[k])
}

id.list.topic <- list()
for(k in 1:length(unique(url.survey.wlabels.topic$caseid))){
	id.list.topic[[as.character(unique(url.survey.wlabels.topic$caseid)[k])]] <- which(url.survey.wlabels.topic$caseid==unique(url.survey.wlabels.topic$caseid)[k])
}

#Function to return the rows associated with a given id number for respondent cluster bootstrap
out.rows.overall <- function(entry){
	return(unlist(id.list.overall[[entry]]))
}

out.rows.election <- function(entry){
	return(unlist(id.list.election[[entry]]))
}

out.rows.topic <- function(entry){
	return(unlist(id.list.topic[[entry]]))
}

set.seed(seed=95)
for(k in 1:1000){
	sample.rows.overall <- NA
	sample.rows.election <- NA
	sample.rows.topic <- NA
	
	keep.ids.overall <- sample(x=as.character(unique(url.survey.wlabels.overall$caseid)), size=length(unique(url.survey.wlabels.overall$caseid)), replace=TRUE)
	keep.ids.election <- sample(x=as.character(unique(url.survey.wlabels.election$caseid)), size=length(unique(url.survey.wlabels.election$caseid)), replace=TRUE)
	keep.ids.topic <- sample(x=as.character(unique(url.survey.wlabels.topic$caseid)), size=length(unique(url.survey.wlabels.topic$caseid)), replace=TRUE)
	
	sample.rows.overall <- as.vector(unlist(sapply(X=keep.ids.overall,FUN=out.rows.overall)))
	sample.rows.election <- as.vector(unlist(sapply(X=keep.ids.election,FUN=out.rows.election)))
	sample.rows.topic <- as.vector(unlist(sapply(X=keep.ids.topic,FUN=out.rows.topic)))

	newframe.overall <- url.survey.wlabels.overall[sample.rows.overall,]
	newframe.election <- url.survey.wlabels.election[sample.rows.election,]
	newframe.topic <- url.survey.wlabels.topic[sample.rows.topic,]
	
	all.isolation.inner <- partisan.segregation.adjusted(newframe.overall,NA,'all')
	election.isolation.inner <- partisan.segregation.adjusted(newframe.election,NA,'all')
	clinton.isolation.inner <- partisan.segregation.adjusted(newframe.topic,'high.level','Clinton Scandal')
	trump.isolation.inner <- partisan.segregation.adjusted(newframe.topic,'high.level','Trump Scandal')
	issue.isolation.inner <- partisan.segregation.adjusted(newframe.topic,'high.level','Issue')
	strategy.isolation.inner <- partisan.segregation.adjusted(newframe.topic,'high.level','Strategy')
	event.isolation.inner <- partisan.segregation.adjusted(newframe.topic,'high.level','Event')
	
	all.isolation.se[k] <- all.isolation.inner
	allelection.isolation.se[k] <- election.isolation.inner
	clinton.scandal.isolation.se[k] <- clinton.isolation.inner
	trump.scandal.isolation.se[k] <- trump.isolation.inner
	issue.isolation.se[k] <- issue.isolation.inner
	strategy.isolation.se[k] <- strategy.isolation.inner
	event.isolation.se[k] <- event.isolation.inner
	
	election.all.diff.se[k] <- election.isolation.inner - all.isolation.inner
	clinton.all.diff.se[k] <- clinton.isolation.inner - all.isolation.inner
	trump.all.diff.se[k] <- trump.isolation.inner - all.isolation.inner
	issue.all.diff.se[k] <- issue.isolation.inner - all.isolation.inner
	strategy.all.diff.se[k] <- strategy.isolation.inner - all.isolation.inner
	event.all.diff.se[k] <- event.isolation.inner - all.isolation.inner
}

se.frame.diff <- cbind.data.frame(election.all.diff.se,clinton.all.diff.se,trump.all.diff.se,issue.all.diff.se,strategy.all.diff.se,event.all.diff.se)
boot.diff.ses <- apply(X=se.frame.diff,MARGIN=2,FUN=quantile,probs=c(.025,.975))

election.all.diff <- allelection.isolation - all.isolation
clinton.all.diff <- clinton.scandal.isolation - all.isolation
trump.all.diff <- trump.scandal.isolation - all.isolation
issue.all.diff <- issue.isolation - all.isolation
strategy.all.diff <- strategy.isolation - all.isolation
event.all.diff <- event.isolation - all.isolation

topic.difference <- c(election.all.diff,clinton.all.diff,trump.all.diff,issue.all.diff,strategy.all.diff,event.all.diff)

upper.adjusted.diff <- 2*topic.difference-boot.diff.ses[1,]
lower.adjusted.diff <- 2*topic.difference-boot.diff.ses[2,]
final.boot.ses.diff <- rbind(unlist(lower.adjusted.diff),unlist(upper.adjusted.diff))
content.specific.diff.ses.boot <- final.boot.ses.diff
save(content.specific.diff.ses.boot,file='content.specific.diff.ses.boot.RData')

se.frame <- cbind.data.frame(all.isolation.se,allelection.isolation.se,clinton.scandal.isolation.se,trump.scandal.isolation.se,issue.isolation.se,strategy.isolation.se,event.isolation.se)
boot.ses <- apply(X=se.frame,MARGIN=2,FUN=quantile,probs=c(.025,.975)) 

outcome <- rbind.data.frame(all.isolation,allelection.isolation,clinton.scandal.isolation,trump.scandal.isolation,issue.isolation,strategy.isolation,event.isolation)
upper.adjusted <- 2*outcome-boot.ses[1,]
lower.adjusted <- 2*outcome-boot.ses[2,]
final.boot.ses <- rbind(unlist(upper.adjusted),unlist(lower.adjusted))

names(outcome) <- c('isolation')
rownames(outcome) <- c('All Visits','Election Visits','Clinton Scandal','Trump Scandal','Issue','Strategy','Event')
outcome$order <- c(1,1,2,2,2,2,2)
order.cols <- order(outcome$order,outcome$isolation)
outcome <- outcome[order.cols,]
final.boot.ses <- final.boot.ses[,order.cols]

pdf(file='figure-2.pdf',height=4,width=5.2)
par(mar=c(4,7,2,2))
plot(y=7:1,x=outcome$isolation,pch=16,yaxt='n',ylab='',xlim=c(.15,.65),xlab="Isolation Index",main="",cex=1.8)
abline(h=5.5,lty=2)
axis(side=2,at=c(7,6,5,4,3,2,1),labels=rownames(outcome),las=1)
segments(y0=7:1,y1=7:1,x0=unlist(final.boot.ses[1,]),x1=unlist(final.boot.ses[2,]),lwd=2)
dev.off()

content.specific.ses.boot <- final.boot.ses 
save(content.specific.ses.boot,file='content.specific.ses.boot.RData')

###
##SLANT
###
rm(list=ls())

#Function for adjusted partisan isolation index measure
partisan.segregation.adjusted <- function(dataframe,variable,category){
	require('tidyr')
	require('plyr')
		
	if(category!='all'){
		url.survey.inner <- dataframe[which(dataframe[,variable]==category),]
	}
	else{
		url.survey.inner <- dataframe
	}
	
	full.data <- url.survey.inner	

	person.domain.frame <- ddply(full.data,.(caseid,rep.partisanship,domain),summarise,person.level.weight=sum(weight,na.rm=TRUE))
	
	#Domain-Level Counts of Visit Patterns
	overall.out <- ddply(full.data,.(domain),summarise,rep.count=sum(weight[which(rep.partisanship==1)],na.rm=TRUE),dem.count=sum(weight[which(rep.partisanship==0)],na.rm=TRUE),neutral.count=sum(weight[which(is.na(rep.partisanship))],na.rm=TRUE))
	overall.out$rep.count.2 <- overall.out$rep.count/((overall.out$rep.count+overall.out$dem.count)/(overall.out$rep.count+overall.out$dem.count+overall.out$neutral.count))
	overall.out$dem.count.2 <- overall.out$dem.count/((overall.out$rep.count+overall.out$dem.count)/(overall.out$rep.count+overall.out$dem.count+overall.out$neutral.count))
	
	#Merge Domain-Level Information to Person-Domain Frame
	out <- merge(person.domain.frame,overall.out,by=c('domain'),all.x=TRUE)
	out <- subset(out,!is.na(out$rep.partisanship))
	
	out$total.count.1 <- out$rep.count + out$dem.count
	out$total.count.2 <- out$rep.count.2 + out$dem.count.2
	
	#Upweight to account for respondents without partisan affiliation
	out$respondent.weight <- out$person.level.weight/((out$rep.count + out$dem.count)/(out$rep.count + out$dem.count + out$neutral.count))
	
	weight.term <- ddply(out,.(domain,rep.partisanship),summarise,domain.party.weights=sum(respondent.weight,na.rm=TRUE))
	out <- merge(out,weight.term,by=c('domain','rep.partisanship'),all.x=TRUE)
	
	#Determine the respondent's relative contribution to the web domain's partisan audience share
	out$relative.weight <- NA
	out$relative.weight[which(out$rep.partisanship==1)] <- out$respondent.weight[which(out$rep.partisanship==1)]/out$domain.party.weights[which(out$rep.partisanship==1)]
	out$relative.weight[which(out$rep.partisanship==0)] <- out$respondent.weight[which(out$rep.partisanship==0)]/out$domain.party.weights[which(out$rep.partisanship==0)]
	
	#Compute final statistic combining these separately for Reps and Dems
	out$final <- NA
	out$final[which(out$rep.partisanship==1)] <- out$relative.weight[which(out$rep.partisanship==1)]*((out$rep.count.2[which(out$rep.partisanship==1)] - out$respondent.weight[which(out$rep.partisanship==1)])/(out$total.count.2[which(out$rep.partisanship==1)]-out$respondent.weight[which(out$rep.partisanship==1)]))
	out$final[which(out$rep.partisanship==0)] <- out$relative.weight[which(out$rep.partisanship==0)]*((out$rep.count.2[which(out$rep.partisanship==0)])/(out$total.count.2[which(out$rep.partisanship==0)]-out$respondent.weight[which(out$rep.partisanship==0)]))
	
	#Domain-Party Measure of Partisan Web Audience
	by.count.biascorrect <- ddply(out,.(domain,rep.partisanship),summarise,final.weight=sum(final,na.rm=TRUE))
	final.weights <- spread(by.count.biascorrect,key=rep.partisanship,value=final.weight)
	names(final.weights) <- c('domain','dem.share','rep.share')
	
	#Combine with Domain Level Mesures
	overall.out <- merge(overall.out,final.weights,by=c('domain'),all.x=TRUE)
	overall.out$rep.audience.share <- overall.out$rep.count.2/sum(overall.out$rep.count.2,na.rm=TRUE)
	overall.out$dem.audience.share <- overall.out$dem.count.2/sum(overall.out$dem.count.2,na.rm=TRUE)
	
	#Final Statistic Comparing Partisan Visit Patterns
	visit.diff <- sum(overall.out$rep.share*overall.out$rep.audience.share,na.rm=TRUE) - sum(overall.out$dem.share*overall.out$dem.audience.share,na.rm=TRUE)
	
	return(visit.diff)
}

load('url.survey.wlabels.RData')
url.survey.wlabels.slant <- unique(url.survey.wlabels[,c('caseid','date','domain','rep.partisanship','slant.rating','weight_pulse')])
url.survey.wlabels.slant <- url.survey.wlabels.slant[which(!is.na(url.survey.wlabels.slant$slant.rating)),]
names(url.survey.wlabels.slant) <- c('caseid','date','domain','rep.partisanship','slant.rating','weight')

neutral.isolation <- partisan.segregation.adjusted(url.survey.wlabels.slant,'slant.rating','Neutral')
moderate.isolation <- partisan.segregation.adjusted(url.survey.wlabels.slant,'slant.rating','Moderate')
high.isolation <- partisan.segregation.adjusted(url.survey.wlabels.slant,'slant.rating','High')
outcome.slant <- c(neutral.isolation,moderate.isolation,high.isolation)

neutral.isolation.se <- NA
moderate.isolation.se <- NA
high.isolation.se <- NA

id.list <- list()
for(k in 1:length(unique(url.survey.wlabels.slant$caseid))){
	id.list[[as.character(unique(url.survey.wlabels.slant$caseid)[k])]] <- which(url.survey.wlabels.slant$caseid==unique(url.survey.wlabels.slant$caseid)[k])
}

#Function to return the rows associated with a given id number for respondent cluster bootstrap
out.rows <- function(entry){
	return(unlist(id.list[[entry]]))
}

diff.se.mod <- NA
diff.se.high <- NA
diff.se.high.mod <- NA

set.seed(seed=90)
for(k in 1:1000){
	sample.rows <- NA
	keep.ids <- sample(x=as.character(unique(url.survey.wlabels.slant$caseid)), size=length(unique(url.survey.wlabels.slant$caseid)), replace=TRUE)
	sample.rows <- as.vector(unlist(sapply(X=keep.ids,FUN=out.rows)))
	
	neutral.seg <- partisan.segregation.adjusted(url.survey.wlabels.slant[sample.rows,],'slant.rating','Neutral')
	moderate.seg <- partisan.segregation.adjusted(url.survey.wlabels.slant[sample.rows,],'slant.rating','Moderate')
	high.seg  <-  partisan.segregation.adjusted(url.survey.wlabels.slant[sample.rows,],'slant.rating','High')
	
	neutral.isolation.se[k] <- neutral.seg
	moderate.isolation.se[k] <- moderate.seg
	high.isolation.se[k] <- high.seg
	
	diff.se.mod[k] <- moderate.seg - neutral.seg
	diff.se.high[k] <- high.seg - neutral.seg
	diff.se.high.mod[k] <- high.seg - moderate.seg
}

mod.diff <- moderate.isolation - neutral.isolation
high.diff <- high.isolation - neutral.isolation
high.mod.diff <- high.isolation - moderate.isolation

mod.diff.ci <- c(2*mod.diff-quantile(diff.se.mod,probs=c(.025,.975))[1],2*mod.diff-quantile(diff.se.mod,probs=c(.025,.975))[2])
high.diff.ci <- c(2*high.diff-quantile(diff.se.high,probs=c(.025,.975))[1],2*high.diff-quantile(diff.se.high,probs=c(.025,.975))[2])
high.mod.diff.ci <- c(2*high.mod.diff-quantile(diff.se.high.mod,probs=c(.025,.975))[1],2*high.mod.diff-quantile(diff.se.high.mod,probs=c(.025,.975))[2])

pdf(file='figure-3.pdf',height=4,width=5.2)
par(mar=c(4,5,2,2))
plot(y=2:1,x=c(mod.diff,high.diff),pch=16,yaxt='n',ylab='',xlim=c(-.01,.15),xlab="Difference in Isolation Index (Relative to Neutral) ",main="",cex=1.8)
axis(side=2,at=c(2,1),labels=c("Moderate","High"),las=1)
abline(v=0,lty=2)
segments(y0=c(2,1),y1=c(2,1),x0=c(mod.diff.ci[1],high.diff.ci[1]),x1=c(mod.diff.ci[2],high.diff.ci[2]),lwd=2)
dev.off()

difference.slant.cis <- rbind.data.frame(mod.diff.ci,high.diff.ci,high.mod.diff.ci)
save(difference.slant.cis,file='difference.slant.cis.RData')

sink()